## Setting up the basics
library(ggplot2)
library(lme4)
## Loading required package: Matrix
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(knitr)
library(ggsignif)
library(plotrix)
folder = "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"
setwd(folder)
knitr::opts_knit$set(root.dir = folder)
#Input Parameters
source(paste0(folder,"/Input/Behav_IC/V1.R"))
#Import functions
source("DescriptiveFunctions.R")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
## The following object is masked from 'package:Matrix':
##
## expand
#Output type for tables. Use "html" to view the output .html file and use "latex" to export .tex tables
outputType="html"
#Stores a list of all the regressions that are run
allModels=vector("list", length=0)
#' Create the formatted table for statistical model export
#'
#' @param model model to be exported
#' @param title Caption shown aththe top of the table
#' @param outputType "html" or "latex"
#' @param outputName The name of the output file
#'
#' @return The text of the table
#' @export table Exports tables if outputType==Latex
modelTableStyle = function(model,title,outputType,outputName){
print(outputName)
# Stores the model in the list with all the models
allModels[[outputName]]<<-model
if (outputType=="html"){
table = stargazer(model, type=outputType, report=('vc*sp'),#(('vc*p'))
column.labels=c("Random Intercept"), dep.var.caption = title,
dep.var.labels.include = TRUE,model.names=FALSE,
align=TRUE, column.sep.width = "5000pt")
} else if (outputType=="latex"){
table = stargazer(model, type=outputType, report=('vc*sp'),
column.labels=c("Random Intercept"), dep.var.caption = title,
dep.var.labels.include = TRUE,model.names=FALSE, align=TRUE,
column.sep.width = "0pt",
out = paste0("Output/Tables/",outputName,".tex"),table.placement="H")
}
#return(table)
}
#' Exports the formatted plot as pdf for report
#' @param plot The ggplot to be exported
#' @param outputName The name of the output file
#'
#' @return
#' @export plot formatted plot as pdf
plotExport = function(plot,outputName){
print(outputName)
print(plot)
path = "/Output/Figures/"
print(folder)
#folder="~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"
pdf(file=paste0(folder,path,outputName,".pdf"))
print(plot)
dev.off()
}
#' Export non-regression-based table to html or latex using Pander
#'
#' @param table The table to be formated
#' @param title Caption shown ath the top of the table
#' @param outputType html" or "latex"
#' @param outputName The name of the output file
#'
#' @return The formatted text of the table
#' @export table Exports tables if outputType==Latex
exportTable = function(table,title,outputType,outputName){
print(outputName)
if (outputType=="html"){
table = stargazer(table, type=outputType, dep.var.caption = title, align=TRUE,
column.sep.width = "5000pt",summary=FALSE,rownames = FALSE)
} else if (outputType=="latex"){
table = stargazer(table, type=outputType, dep.var.caption = title, align=TRUE,
column.sep.width = "0pt",
out = paste0("Output/Tables/",outputName,".tex"),
table.placement="H",
summary=FALSE,rownames = FALSE)
}
}
Knapsack Decision Variant
#Imports Decision Data
dataAllDec = importTrialInfo(folderDataDec,"dec")
#Adds Phase Transition Dummy Variable to data (1-> in Phase Transition / 0 -> Out of Phase Transition)
dataAllDec$phaseT=as.numeric(dataAllDec$type<=4)
#Adds an experiment version column to the data based on "Participants Log.csv"
partLog0=read.csv(fileParticipants, stringsAsFactors = FALSE)
partLog=data.frame(pID=partLog0$Participant.ID, ExpVersion=partLog0$Experiment.Version,stringsAsFactors = FALSE)
dataAllDec=merge(dataAllDec,partLog, by="pID" )
#Filters to get only the relevant versions
dataAllDec=dplyr::filter(dataAllDec,ExpVersion %in% versions)
#Imports Algorithm-specific complexity measures (MZN porpagations and SAT decisions)
decisionInstanceInfo= read.csv(fileDecInstances, sep=",", stringsAsFactors = FALSE)
names(decisionInstanceInfo)[names(decisionInstanceInfo)=="propagations_MZN"]="propagations"
names(decisionInstanceInfo)[names(decisionInstanceInfo)=="decisions_SAT"]="decisions"
names(decisionInstanceInfo)[names(decisionInstanceInfo)=="problem"]="id"
decisionInstanceInfo=decisionInstanceInfo[,c('id','propagations','decisions')]
#Imports Algorithm-specific complexity measures (All instances with MZN(Gecode))
#decInstanceInfoAll = read.csv(fileDecInstancesAll, sep=",", stringsAsFactors = FALSE)
#Generates DataDecProp:= Decision data including expost complexity measures
dataDecProp=merge(dataAllDec,decisionInstanceInfo, by="id")
# Cleaning Decsion Data:
# Filters out those trials in which an answer was not given
nOmitTrials = length(dataDecProp$answer[dataDecProp$answer==2])
NOmitPart= length(unique(dataDecProp$pID[dataDecProp$answer==2]))
dataDecProp=dataDecProp %>% filter(answer!=2)
print(paste(nOmitTrials,"Trials were omitted due to non-answers (from", NOmitPart,"Participants)."))
## [1] "13 Trials were omitted due to non-answers (from 8 Participants)."
#Filter additional participants.
#Filter paticipant with 55% accuracy?
participantsToOmit=c()#c("be16")
dataDecProp = dataDecProp %>% filter(!(pID %in% participantsToOmit))
print(paste0(length(participantsToOmit)," participants were omitted in the decision analysis."))
## [1] "0 participants were omitted in the decision analysis."
#Adds sum of weights and sum of values
dataDecProp=addSumOfValues(dataDecProp)
dataDecProp=addSumOfWeights(dataDecProp)
names(dataDecProp)[names(dataDecProp)=="timeSpentAprox"]="timeSpent"
summaryListDec= summaryData(dataDecProp)
Descriptive Summary
print(paste('Participants To be analysed are:',paste(sort(unique(dataDecProp$pID)),collapse=" ")))
## [1] "Participants To be analysed are: be14 be15 be16 be17 be18 be19 be20 be21 be22 be23 be24 be25 be26 be27 be28 be29 be30 be31 be32 be33"
print(paste('Number of participants to analyse :',length(unique(dataDecProp$pID)) ) )
## [1] "Number of participants to analyse : 20"
print(paste("The overall Accuracy was: ",mean(dataDecProp$correct)))
## [1] "The overall Accuracy was: 0.831114225648213"
#Summary Stats for Decision Problem
dataInput=dataDecProp
accuracySummary = dataInput %>% group_by(pID) %>% summarise(acc=mean(correct)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(accuracySummary, digits=2, caption = "Accuracy Summary")
Accuracy Summary
| 0.83 |
0.56 |
0.9 |
0.08 |
answerSummary = dataInput %>% group_by(pID) %>% summarise(acc=mean(answer)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(answerSummary, digits=2, caption = "Proportion of times that YES was selected as the answer")
Proportion of times that YES was selected as the answer
| 0.48 |
0.31 |
0.6 |
0.06 |
yesNoProportions = dataInput %>% group_by(sol,pID) %>% summarise(acc=mean(correct)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(yesNoProportions, digits=2, caption = "Accuracy By Solution")
Accuracy By Solution
| 0 |
0.85 |
0.69 |
0.97 |
0.08 |
| 1 |
0.81 |
0.36 |
0.89 |
0.12 |
In and Out of Phase Transition Contrast
Accuracy Contrast (Computational Performance)
dataInput=dataDecProp %>% group_by(phaseT,pID)%>%summarise(accuracy1=mean(correct))%>%ungroup()%>%group_by(phaseT)%>%summarise(accuracy=mean(accuracy1),se=se(accuracy1))%>%ungroup()
dataInput$phaseT = recode(dataInput$phaseT, '0' = "Low IC", '1' = "High IC")
plo= ggplot(data=dataInput, aes(y=accuracy, x=as.factor(phaseT),label = round(accuracy,digits = 2))) +
geom_bar(stat = "identity")+
geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1)+
geom_signif(comparisons = list(c("Low IC","High IC")), annotations="***", y_position = 0.95, tip_length = 0.03)+
labs(title="Accuracy In and Out of phase Transition",x="Instance complexity",y="Accuracy")+
coord_cartesian(ylim = c(0.5,1))+
geom_text(hjust=0.5,vjust = 5, colour="white") +
theme_light()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))
outputName = "dec02g"
plotExport(plo,outputName)
## [1] "dec02g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
#Anova contrast for In/Out Phase transition
anovaModel=aov(correct~phaseT+Error(pID/phaseT),dataDecProp)
anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
[1] “P-value for one way ANOVA: 3.58e-09”
rm(dataInput)
#Logistic Regression for In/Out Phase transition
#logistic with random effects (intercept)
logitRandomIntercept = glmer(correct ~ phaseT + (1|pID), family=binomial(link="logit"), data=dataDecProp)
title="Logistic regressions"
tableName='dec02r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “dec02r”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
phaseT
|
-1.327***
|
|
|
(0.161)
|
|
|
p = 0.000
|
|
|
|
|
Constant
|
2.451***
|
|
|
(0.167)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
1,427
|
|
Log Likelihood
|
-602.372
|
|
Akaike Inf. Crit.
|
1,210.744
|
|
Bayesian Inf. Crit.
|
1,226.534
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Solvability and Phase Transition
Is Phase Transition is still significant regardless of the instance being solvable or not?
dataInput=dataDecProp %>% group_by(phaseT,sol,pID)%>%summarise(accuracy1=mean(correct))%>%ungroup()%>%group_by(sol,phaseT)%>%summarise(accuracy=mean(accuracy1),se=se(accuracy1))%>%ungroup()
sol_names <- c(
`0` = "Correct answer: NO",
`1` = "Correct answer: YES",
`2` = "If this is here it means data not filtered"
)
plo=ggplot(data=dataInput, aes(y=accuracy, x=as.factor(as.logical(phaseT)), label = round(accuracy,digits = 2),group=1)) +
geom_line()+
geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1) +
geom_point(shape=21, size=3, fill="white")+
labs(title="Accuracy segregated by solvanbility",x="In Phase Transition?",y="Accuracy")+
theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle=0,hjust=0.5))+
geom_text(hjust=-0.5, colour="black")+
facet_grid(.~sol, labeller = as_labeller(sol_names))
outputName = "dec03g"
plotExport(plo,outputName)
## [1] "dec03g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
#logistic with random effects (intercept)
logitRandomIntercept = glmer(correct ~ phaseT + sol + phaseT:sol + (1|pID), family=binomial(link="logit"), data=dataDecProp)
title="Logistic regressions"
tableName='dec03r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “dec03r”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
phaseT
|
-1.285***
|
|
|
(0.240)
|
|
|
p = 0.00000
|
|
|
|
|
sol
|
-0.250
|
|
|
(0.271)
|
|
|
p = 0.355
|
|
|
|
|
phaseT:sol
|
-0.084
|
|
|
(0.323)
|
|
|
p = 0.796
|
|
|
|
|
Constant
|
2.584***
|
|
|
(0.224)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
1,427
|
|
Log Likelihood
|
-600.149
|
|
Akaike Inf. Crit.
|
1,210.299
|
|
Bayesian Inf. Crit.
|
1,236.615
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Phase Transition effect by Region (Over-Under-InPT)
Is Phase Transition is significant irregardless of region (OverConstrained/Underconstrained)?
dataInput=dataDecProp %>% mutate(region=recode(type, `1`= 'Phase Transition', `2`= 'Phase Transition', `3`= 'Phase Transition', `4`='Phase Transition', '5'='Overconstrained', '6'='Underconstrained'))
dataInput2=dataInput %>% group_by(region,pID)%>%summarise(accuracy1=mean(correct))%>%ungroup()%>%group_by(region)%>%summarise(accuracy=mean(accuracy1),se=se(accuracy1))%>%ungroup()
dataInput2$region <- factor(dataInput2$region, levels = c('Underconstrained',
'Phase Transition',
'Overconstrained'))
plo= ggplot(data=dataInput2, aes(y=accuracy, x=region, label = round(accuracy,digits = 2))) +
geom_bar(stat = "identity")+
geom_signif(annotations = c("***","***"),
y_position=c(0.95,0.95),xmin=c(1.1,2.1),xmax=c(1.9,2.9))+
geom_signif(comparisons = list(c("Underconstrained","Overconstrained")),
annotations="NS", y_position = 0.99, tip_length = 0.03)+
geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1)+
# geom_signif(comparisons = list(c("Phase Transition","Overconstrained")),
# annotations="***", y_position = 0.98, tip_length = 0.05)+
# geom_signif(comparisons = list(c("Phase Transition","Underconstrained")),
# annotations="***", y_position = 0.96, tip_length = 0.05,size=0.4)+
labs(title="Accuracy by region",x="Region",y="Accuracy")+
coord_cartesian(ylim = c(0.5,1))+
geom_text(hjust=0.5,vjust = 5, colour="white") +
theme_light()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))
outputName = "decB01g"
plotExport(plo,outputName)
## [1] "decB01g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
#Stats test for time in and out of phase transition: ANOVA
dataInput$overConstrained= (dataInput$region=='Overconstrained')
dataInput$underConstrained= (dataInput$region=='Underconstrained')
# print("Anova between Overconstrained and Underconstrained")
# anovaModel=aov(correct~overConstrained+Error(pID/overConstrained),dataInput %>% filter(overConstrained | underConstrained))
# anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
# print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
#
# print("Anova between Overconstrained and In-PhaseT")
# anovaModel=aov(correct~phaseT+Error(pID/phaseT),dataInput %>% filter(overConstrained | phaseT))
# anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
# print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
#
# print("Anova between Underconstrained and In-PhaseT")
# anovaModel=aov(correct~phaseT+Error(pID/phaseT),dataInput %>% filter(underConstrained | phaseT))
# anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
# print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
logitRandomIntercept = glmer(correct ~ overConstrained + underConstrained + (1|pID), family=binomial(link="logit"), data=dataInput)
title="Logistic regressions"
tableName="decB01r1"
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “decB01r1”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
overConstrained
|
1.459***
|
|
|
(0.220)
|
|
|
p = 0.000
|
|
|
|
|
underConstrained
|
1.208***
|
|
|
(0.202)
|
|
|
p = 0.000
|
|
|
|
|
Constant
|
1.125***
|
|
|
(0.130)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
1,427
|
|
Log Likelihood
|
-601.946
|
|
Akaike Inf. Crit.
|
1,211.891
|
|
Bayesian Inf. Crit.
|
1,232.944
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
rm(logit)
## Warning in rm(logit): object 'logit' not found
logitRandomIntercept = glmer(correct ~ overConstrained + phaseT + (1|pID), family=binomial(link="logit"), data=dataInput)
title="Logistic regressions"
tableName="decB01r2"
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “decB01r2”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
overConstrained
|
0.250
|
|
|
(0.271)
|
|
|
p = 0.355
|
|
|
|
|
phaseT
|
-1.208***
|
|
|
(0.202)
|
|
|
p = 0.000
|
|
|
|
|
Constant
|
2.333***
|
|
|
(0.206)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
1,427
|
|
Log Likelihood
|
-601.946
|
|
Akaike Inf. Crit.
|
1,211.891
|
|
Bayesian Inf. Crit.
|
1,232.944
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
rm(logit)
## Warning in rm(logit): object 'logit' not found
Number of Solutions that satisfy the Constraints and Accuracy (Computational Performance)
dataInput=dataDecProp
dataInput=nSolutions(dataInput)
#Plots nSolutions (x) vs. Accuracy (y)
dataInput3 = dataInput %>% group_by(nSolutions,pID,phaseT)%>%summarise(accuracyMeans=mean(correct))%>%ungroup()%>%group_by(nSolutions,phaseT)%>%summarise(accuracy=mean(accuracyMeans),se=se(accuracyMeans))%>%ungroup()
dataInput3$sol = dataInput3$nSolutions>=1
dataInput3$phaseT = recode(dataInput3$phaseT, '0' = "Low IC", '1' = "High IC")
plo= ggplot(data=dataInput3, aes(y=accuracy, x=as.factor(nSolutions))) +
geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1)+
geom_point(shape=23, size=3, fill="red")+
labs(title="Accuracy and the Number of Solutions",
x="Number of Solutions",y="Accuracy")+
coord_cartesian(ylim = c(0.5,1))+
theme_light()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
plot.title = element_text(hjust = 0.5),strip.text.x = element_blank())+
facet_grid(phaseT~sol, scales = "free_x", space = "free_x")+
geom_smooth(data=dataInput3, aes(y=accuracy, x=nSolutions),method=glm,se=FALSE,fullrange=TRUE,linetype = "dashed")
outputName = "decB02g"
plotExport(plo,outputName)
## [1] "decB02g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
#Number of solutions of solvable solutions: Meand difference between In/Out of Phase T
library(pander)
dataInput2 = unique(dataInput %>% select(phaseT,id,nSolutions,sol) %>% filter(sol==1))
diffMeans = t.test(nSolutions ~ phaseT ,data=dataInput2)
pander(diffMeans)
Welch Two Sample t-test: nSolutions by phaseT (continued below)
| 8.428 |
26.36 |
5.879e-09 * * * |
two.sided |
#Includes a dummy if nSolutions==0, that's variable: sol.
logitRandomIntercept = glmer(correct ~ sol + phaseT + nSolutions + phaseT:nSolutions + (1|pID), family=binomial(link="logit"), data=dataInput)
title="Logistic regressions"
tableName='decB02r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “decB02r”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
sol
|
-1.427***
|
|
|
(0.243)
|
|
|
p = 0.000
|
|
|
|
|
phaseT
|
-1.339***
|
|
|
(0.225)
|
|
|
p = 0.000
|
|
|
|
|
nSolutions
|
0.139***
|
|
|
(0.041)
|
|
|
p = 0.001
|
|
|
|
|
phaseT:nSolutions
|
0.448***
|
|
|
(0.101)
|
|
|
p = 0.00001
|
|
|
|
|
Constant
|
2.627***
|
|
|
(0.220)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
1,427
|
|
Log Likelihood
|
-581.201
|
|
Akaike Inf. Crit.
|
1,174.402
|
|
Bayesian Inf. Crit.
|
1,205.982
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
#This version recodes phaseT to OutphaseT, to calculate the p-value of the numberOfSolutions beta(slope) In Phase Transition
dataInput$OutPhaseT = 1 - dataInput$phaseT
logitRandomIntercept2 = glmer(correct ~ sol + OutPhaseT + nSolutions + OutPhaseT:nSolutions + (1|pID), family=binomial(link="logit"), data=dataInput)
title="Logistic regressions"
tableName='decB02r_b'
modelTableStyle(logitRandomIntercept2,title,outputType,tableName)
[1] “decB02r_b”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
sol
|
-1.427***
|
|
|
(0.243)
|
|
|
p = 0.000
|
|
|
|
|
OutPhaseT
|
1.339***
|
|
|
(0.225)
|
|
|
p = 0.000
|
|
|
|
|
nSolutions
|
0.586***
|
|
|
(0.112)
|
|
|
p = 0.00000
|
|
|
|
|
OutPhaseT:nSolutions
|
-0.448***
|
|
|
(0.101)
|
|
|
p = 0.00001
|
|
|
|
|
Constant
|
1.289***
|
|
|
(0.162)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
1,427
|
|
Log Likelihood
|
-581.201
|
|
Akaike Inf. Crit.
|
1,174.402
|
|
Bayesian Inf. Crit.
|
1,205.982
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Instance Sampling Figure
#This shows the instances sampled for the decision task, together with the regions from which they were sampled
dataInput = dataDecProp
dataInput$nCapacity=dataInput$c/dataInput$totalWeights
dataInput$nProfit=dataInput$p /dataInput$totalValues
dataInput$lnVC=log(dataInput$nProfit/dataInput$nCapacity)
dataInput3 = dataInput %>% mutate(region=recode(type, `1`= 'Phase Transition',
`2`= 'Phase Transition', `3`= 'Phase Transition',
`4`='Phase Transition', '5'='Overconstrained',
'6'='Underconstrained'))
dataInput2 =dataInput3 %>% group_by(region) %>% summarise(minP=min(propagations),
medianP = median(propagations),
maxP =max(propagations),
mlnVC=mean(lnVC)) %>% arrange(desc(region))
phaseTransitionLevelLow=log(0.6/0.45)
phaseTransitionLevelHigh=log(0.65/0.4)
OUTphaseTransitionLevelLow=log(0.85/0.45)
OUTphaseTransitionLevelHigh=log(0.9/0.4)
OUT2phaseTransitionLevelLow=log(0.35/0.45)
OUT2phaseTransitionLevelHigh=log(0.4/0.4)
plo1 = ggplot(dataInput3,aes(x=lnVC,y=propagations,group=as.factor(region)))+
geom_crossbar(data=dataInput2,aes(ymin = minP, ymax = maxP, x = mlnVC, y = minP),
fill = "red",alpha=1)+
geom_crossbar(data=dataInput2,aes(ymin = minP, ymax = c(maxP[1],medianP[2],maxP[3]), x = mlnVC, y = minP, fill=as.factor(region)),alpha=1)+
scale_fill_manual(name ="Region",
breaks=c("Overconstrained", "Phase Transition", "Underconstrained"),
labels=c("Overconstrained", "Phase Transition", "Underconstrained"),
values=c("#FBB040","green","#FBB040"))+
geom_point(aes(shape=as.factor(sol)))+
scale_shape_manual(name ="Solution",
breaks=c("0", "1"),
labels=c("NO", "YES"),
values=c(6,4))+
theme_light()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))
plotExport(plo1,"decSampling")
## [1] "decSampling"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
Accuracy (Comp. Perf.) and continuous approximation to Instance Complexity (Symmetric)
#Plots ln (Normalosed Profit / Normalised capacity) against accuracy for the decision task
dataInput = dataDecProp
dataInput$nCapacity=dataInput$c/dataInput$totalWeights
dataInput$nProfit=dataInput$p /dataInput$totalValues
dataInput$lnVC=log(dataInput$nProfit/dataInput$nCapacity)
p_IC = 0.4
dataInput$d_lnVC=abs(dataInput$lnVC-p_IC)
dataInput$IC_cont = -pweibull(dataInput$d_lnVC, shape=2, scale=0.2)
plo= ggplot(dataInput,aes(x=d_lnVC,y=IC_cont))+
geom_point()
outputName='dec06g'
plotExport(plo,outputName)
## [1] "dec06g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
#logistic with random effects (intercept)
logitRandomIntercept = glmer(correct ~ IC_cont + (1|pID), family=binomial(link="logit"), data=dataInput)
title="Logistic regressions"
tableName='dec06r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “dec06r”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
IC_cont
|
-1.463***
|
|
|
(0.176)
|
|
|
p = 0.000
|
|
|
|
|
Constant
|
1.057***
|
|
|
(0.133)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
1,427
|
|
Log Likelihood
|
-601.498
|
|
Akaike Inf. Crit.
|
1,208.997
|
|
Bayesian Inf. Crit.
|
1,224.787
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Accuracy and continuous approximation to Instance Complexity (Non-Symmetric)
#Plots ln (Normalosed Profit / Normalised capacity) against accuracy for the decision task
dataInput = dataDecProp
dataInput$nCapacity=dataInput$c/dataInput$totalWeights
dataInput$nProfit=dataInput$p /dataInput$totalValues
dataInput$lnVC=log(dataInput$nProfit/dataInput$nCapacity)
p_IC = 0.4
dataInput$d_lnVC=dataInput$lnVC-p_IC
shape_r=2
shape_l=2
scale_r=0.2
scale_l=0.2
dataInput$IC_cont[dataInput$d_lnVC>=0] = -pweibull(dataInput$d_lnVC[dataInput$d_lnVC>=0], shape=shape_r, scale=scale_r)+1
min_l=0.25
dataInput$IC_cont[dataInput$d_lnVC<0] = (-pweibull(-dataInput$d_lnVC[dataInput$d_lnVC<0], shape=shape_l, scale=scale_l)+1)*(1-min_l)+min_l
#-pweibull(-dataInput$d_lnVC[dataInput$d_lnVC<0], shape=shape_l, scale=scale_l)+1
phaseTransitionLevelLow=log(0.6/0.45)
phaseTransitionLevelHigh=log(0.65/0.4)
OUTphaseTransitionLevelLow=log(0.85/0.45)
OUTphaseTransitionLevelHigh=log(0.9/0.4)
OUT2phaseTransitionLevelLow=log(0.35/0.45)
OUT2phaseTransitionLevelHigh=log(0.4/0.4)
plo = ggplot(dataInput,aes(x=lnVC,y=IC_cont))+
annotate("rect", xmin=phaseTransitionLevelLow,xmax=phaseTransitionLevelHigh,ymin=0,ymax=Inf, alpha=0.2, fill="red")+
annotate("rect", xmin=OUTphaseTransitionLevelLow,xmax=OUTphaseTransitionLevelHigh,ymin=0,ymax=Inf, alpha=0.2, fill="green")+
annotate("rect", xmin=OUT2phaseTransitionLevelLow,xmax=OUT2phaseTransitionLevelHigh,ymin=0,ymax=Inf, alpha=0.2, fill="green")+
geom_point()+
theme_light()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
outputName='dec06gB'
plotExport(plo,outputName)
## [1] "dec06gB"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
#logistic with random effects (intercept)
logitRandomIntercept = glmer(correct ~ IC_cont + (1|pID), family=binomial(link="logit"), data=dataInput)
title="Logistic regressions"
tableName='dec06rB'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “dec06rB”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
IC_cont
|
-1.701***
|
|
|
(0.203)
|
|
|
p = 0.000
|
|
|
|
|
Constant
|
2.758***
|
|
|
(0.193)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
1,427
|
|
Log Likelihood
|
-600.438
|
|
Akaike Inf. Crit.
|
1,206.876
|
|
Bayesian Inf. Crit.
|
1,222.666
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Knapsack Optimisation Variant
dataAllOpt = importTrialInfo(folderDataOpt,"opt")
#Adds an experiment version column to the data based on "Participants Log.csv"
dataAllOpt=merge(dataAllOpt,partLog, by="pID" )
#Filters to get only the relevant versions
dataAllOpt=dplyr::filter(dataAllOpt,ExpVersion %in% versions)
#Imports Data Clicks Data
dataAllClicks = importClicksInfo(folderDataOpt)
dataAllClicks=merge(dataAllClicks,partLog, by="pID")
dataAllClicks=dplyr::filter(dataAllClicks,ExpVersion %in% versions)
#Adds Phase Transition Dummy Variable to data (1-> in Phase Transition / 0 -> Out of Phase Transition)
dataAllOpt$phaseT=as.numeric(dataAllOpt$type<=4)
#Imports Algorithm-specific complexity measures (MZN porpagations, SAT decisions and Sahni-k)
optInstanceInfo= read.csv(fileOptInstances, sep=",", stringsAsFactors = FALSE)
names(optInstanceInfo)[names(optInstanceInfo)=="propagations_MZN"]="propagations"
names(optInstanceInfo)[names(optInstanceInfo)=="decisions_SAT"]="decisions"
names(optInstanceInfo)[names(optInstanceInfo)=="problem"]="id"
optInstanceInfo=optInstanceInfo[,c('id','propagations','decisions','sahniK')]
#Generates DataOptProp:= Optimisation Task data including expost complexity measures
dataOptProp=merge(dataAllOpt,optInstanceInfo, by="id")
#Adds A column with the number of click away from optimum for each trial
dataOptProp=addItemDistanceFromOpt(dataOptProp)
#Adds sum of weights and sum of values
dataOptProp=addSumOfValues(dataOptProp)
dataOptProp=addSumOfWeights(dataOptProp)
# Cleaning Optimisation Task Data:
# Filters out the cases in which participants spent less than 1 second in the task
dataOptProp1 = dataOptProp %>% filter(dataOptProp$timeSpent>=1)
numberOfDeletedTrials=dim(dataOptProp)[1]-dim(dataOptProp1)[1]
NOmitPartOpt= length(unique(dataOptProp %>% filter(dataOptProp$timeSpent<1) %>% pull(pID)))
print(paste("Number of Trials Deleted because participants spent less than 1 second in the task:",
numberOfDeletedTrials,"(from",NOmitPartOpt,"Participants)."))
## [1] "Number of Trials Deleted because participants spent less than 1 second in the task: 2 (from 2 Participants)."
dataOptProp = dataOptProp1
rm(dataOptProp1)
summaryListOpt= summaryData(dataOptProp)
# Omit those participants that never submitted their answer from the timeSpent analysis
#omitPID = c("be19","be31")
minTimeSpent = dataOptProp %>% group_by(pID) %>% summarise(minTime = min(timeSpent))
omitPID = minTimeSpent$pID[abs(optTaskMaxTime-minTimeSpent$minTime)<0.1] #Select those participants whose min time spent was optTaskMaxTime
dataOptTime=dataOptProp[!(dataOptProp$pID %in% omitPID),]
print(paste0("For time analysis (*effort*) ", length(omitPID)," participants that never submitted their answers were omited."))
## [1] "For time analysis (*effort*) 3 participants that never submitted their answers were omited."
Descriptive Sumamry
print(paste('Participants To be analysed are:',paste(unique(dataOptProp$pID),collapse=" ")))
## [1] "Participants To be analysed are: be30 be31 be32 be19 be15 be24 be26 be16 be25 be18 be33 be14 be29 be28 be17 be27 be23 be21 be22 be20"
print(paste('Number of participants to analyse :',length(unique(dataOptProp$pID)) ) )
## [1] "Number of participants to analyse : 20"
print(paste("The overall Accuracy was: ",mean(dataOptProp$correct)))
## [1] "The overall Accuracy was: 0.832402234636871"
#Summary Stats for Optimisation Problem
dataInput=dataOptProp
dataInput2= dataOptTime
accuracySummary = dataInput %>% group_by(pID) %>% summarise(acc=mean(correct)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(accuracySummary, digits=2, caption ="Accuracy Summary")
Accuracy Summary
| 0.83 |
0.67 |
0.94 |
0.08 |
timeSummary = dataInput2 %>% group_by(pID) %>% summarise(acc=mean(timeSpent)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(timeSummary, digits=2, caption ="Time spent Summary")
Time spent Summary
| 42.51 |
27.38 |
58.66 |
8.09 |
Effect of Trial number
Trial Number and Accuracy
#Trial (experience effect) effect on accuracy
dataInput=dataOptProp
dataInput$totalTrial = dataInput$block * dataInput$trial
summaryByBlock = dataInput %>% group_by(block,pID) %>% summarise(acc=mean(correct)) %>% summarise(mean=mean(acc),min=min(acc),max=max(acc),SD=sd(acc))
kable(summaryByBlock, digits=2, caption="Accuracy By Block")
Accuracy By Block
| 1 |
0.79 |
0.44 |
1 |
0.14 |
| 2 |
0.88 |
0.67 |
1 |
0.11 |
#Regression
logitRandomIntercept = glmer(correct ~ totalTrial+ (1|pID), family=binomial(link="logit"), data=dataInput)
title="Logistic regressions"
tableName='opt01r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “opt01r”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
totalTrial
|
0.012
|
|
|
(0.030)
|
|
|
p = 0.683
|
|
|
|
|
Constant
|
1.512***
|
|
|
(0.261)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
358
|
|
Log Likelihood
|
-161.752
|
|
Akaike Inf. Crit.
|
329.504
|
|
Bayesian Inf. Crit.
|
341.146
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Trial Number and Time Spent
#Trial (experience effect) effect on timeSpent
dataInput=dataOptTime
dataInput$totalTrial = dataInput$block * dataInput$trial
summaryByBlock = dataInput %>% group_by(block,pID) %>% summarise(time=mean(timeSpent)) %>% summarise(mean=mean(time),min=min(time),max=max(time),SD=sd(time))
kable(summaryByBlock,digits=2 , caption="Time Spent per Trial segregated By Block")
Time Spent per Trial segregated By Block
| 1 |
42.81 |
31.11 |
57.33 |
7.60 |
| 2 |
42.22 |
23.65 |
60.00 |
9.07 |
#Regression
linearRandomIntercept = lmer(timeSpent ~ totalTrial+ (1|pID), data=dataInput)
title="Linear regressions"
tableName='opt02r'
modelTableStyle(linearRandomIntercept,title,outputType,tableName)
[1] “opt02r”
|
|
|
|
Linear regressions
|
|
|
|
|
|
timeSpent
|
|
|
Random Intercept
|
|
|
|
totalTrial
|
0.095
|
|
|
(0.135)
|
|
|
p = 0.483
|
|
|
|
|
Constant
|
41.802***
|
|
|
(2.210)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
305
|
|
Log Likelihood
|
-1,188.894
|
|
Akaike Inf. Crit.
|
2,385.788
|
|
Bayesian Inf. Crit.
|
2,400.670
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
In and Out of Phase Transition Contrast
Phase Transition and Accuracy (Computational Performance)
dataInput=dataOptProp %>% group_by(phaseT,pID)%>%summarise(accuracy1=mean(correct))%>%ungroup()%>%group_by(phaseT)%>%summarise(accuracy=mean(accuracy1),se=se(accuracy1))%>%ungroup()
dataInput$phaseT = recode(dataInput$phaseT, '0' = "Low IC", '1' = "High IC")
plo= ggplot(data=dataInput, aes(y=accuracy, x=as.factor(phaseT),label = round(accuracy,digits = 2))) +
geom_bar(stat = "identity")+
geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1)+
geom_signif(comparisons = list(c("Low IC","High IC")), annotations="***", y_position = 1.01, tip_length = 0.03)+
labs(title="Accuracy and Instance Complexity (Optimisation)",x="Instance complexity",y="Computational performance")+
coord_cartesian(ylim = c(0.5,1.03))+
geom_text(hjust=0.5,vjust = 5, colour="white") +
theme_light()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))
outputName = "opt03g"
plotExport(plo,outputName)
## [1] "opt03g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
anovaModel=aov(correct~phaseT+Error(pID/phaseT),dataOptProp)
anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
## [1] "P-value for one way ANOVA: 2.21e-06"
rm(dataInput)
#logistic with random effects (intercept)
logitRandomIntercept = glmer(correct ~ phaseT + (1|pID), family=binomial(link="logit"), data=dataOptProp)
title="Logistic regressions"
tableName='opt03r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “opt03r”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
phaseT
|
-2.175***
|
|
|
(0.531)
|
|
|
p = 0.00005
|
|
|
|
|
Constant
|
3.359***
|
|
|
(0.509)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
358
|
|
Log Likelihood
|
-147.622
|
|
Akaike Inf. Crit.
|
301.245
|
|
Bayesian Inf. Crit.
|
312.887
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Time Spent (Effort) In/Out Phase Transition
dataInput=dataOptTime %>% group_by(phaseT,pID)%>%summarise(timeSpentMeans=mean(timeSpent))%>%ungroup()%>%group_by(phaseT)%>%summarise(time=mean(timeSpentMeans),se=se(timeSpentMeans))%>%ungroup()
dataInput$phaseT = recode(dataInput$phaseT, '0' = "Low IC", '1' = "High IC")
plo= ggplot(data=dataInput, aes(y=time, x=as.factor(phaseT),label = round(time,digits = 1))) +
geom_bar(stat = "identity")+
geom_errorbar(aes(ymin=time-se, ymax=time+se), width=.1)+
geom_signif(comparisons = list(c("Low IC","High IC")), annotations="***", y_position = 50, tip_length = 0.03)+
labs(title="Time spent on an instance and Instance Complexity",x="Instance complexity",y="Average time spent on an instance")+
geom_text(hjust=0.5,vjust = 5, colour="white") +
theme_light()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))+
coord_cartesian(ylim = c(0,52))
outputName = "opt04g"
plotExport(plo,outputName)
## [1] "opt04g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
#Stats test for time in and out of phase transition: ANOVA
anovaModel=aov(timeSpent~phaseT+Error(pID/phaseT),dataOptTime)
anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
## [1] "P-value for one way ANOVA: 3.39e-07"
rm(dataInput)
#logistic with random effects (intercept)
linearMRandomIntercept = lmer(timeSpent ~ phaseT + (1|pID), data=dataOptTime)
title="Linear regressions"
tableName='opt04r'
modelTableStyle(linearMRandomIntercept,title,outputType,tableName)
[1] “opt04r”
|
|
|
|
Linear regressions
|
|
|
|
|
|
timeSpent
|
|
|
Random Intercept
|
|
|
|
phaseT
|
10.114***
|
|
|
(1.236)
|
|
|
p = 0.000
|
|
|
|
|
Constant
|
35.749***
|
|
|
(2.132)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
305
|
|
Log Likelihood
|
-1,156.717
|
|
Akaike Inf. Crit.
|
2,321.433
|
|
Bayesian Inf. Crit.
|
2,336.314
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Effort and Phase Transition interaction effect on Accuracy
MZN Propagations
MZN Propagations and Time Spent
#Average per propagations plotted
groups5=dataOptTime %>% group_by(propagations)
dat5=summarise(groups5, time=mean(timeSpent), se=se(propagations))
plo = ggplot(data=dat5, aes(y=time, x=propagations)) +
geom_point()+
labs(title="Time Spent and MZN Propagations",y="Time spent on an instance",x="MZN Complexity Measure (Propagations)")+
theme(plot.title = element_text(hjust = 0.5)) +
stat_smooth(method = glm)+
geom_hline(aes(yintercept=0.5), colour="#990000", linetype="dashed")
outputName = "opt06g2"
plotExport(plo,outputName)
## [1] "opt06g2"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
linearMRandomIntercept = lmer(timeSpent ~ propagations + (1|pID), data=dataOptTime)
title="Linear regressions"
tableName='opt06r2'
modelTableStyle(linearMRandomIntercept,title,outputType,tableName)
[1] “opt06r2”
|
|
|
|
Linear regressions
|
|
|
|
|
|
timeSpent
|
|
|
Random Intercept
|
|
|
|
propagations
|
0.438***
|
|
|
(0.100)
|
|
|
p = 0.00002
|
|
|
|
|
Constant
|
32.505***
|
|
|
(3.013)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
305
|
|
Log Likelihood
|
-1,180.119
|
|
Akaike Inf. Crit.
|
2,368.238
|
|
Bayesian Inf. Crit.
|
2,383.119
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
SAT Decisions
SAT Decisions and Time Spent
groups5=dataOptTime %>% group_by(decisions)
dat5=summarise(groups5, time=mean(timeSpent), se=se(decisions))
plo = ggplot(data=dat5, aes(y=time, x=decisions)) +
geom_point()+
labs(title="Time Spent and SAT decisions",y="Time spent on an instance",x="SAT Complexity Measure (Decisions)")+
theme(plot.title = element_text(hjust = 0.5)) +
stat_smooth(method = glm)+
geom_hline(aes(yintercept=0.5), colour="#990000", linetype="dashed")
outputName = "opt08g"
plotExport(plo,outputName)
## [1] "opt08g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
linearMRandomIntercept = lmer(timeSpent ~ decisions + (1|pID), data=dataOptTime)
title="Linear regressions"
tableName='opt08r'
modelTableStyle(linearMRandomIntercept,title,outputType,tableName)
[1] “opt08r”
|
|
|
|
Linear regressions
|
|
|
|
|
|
timeSpent
|
|
|
Random Intercept
|
|
|
|
decisions
|
0.276***
|
|
|
(0.080)
|
|
|
p = 0.001
|
|
|
|
|
Constant
|
37.165***
|
|
|
(2.503)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
305
|
|
Log Likelihood
|
-1,183.824
|
|
Akaike Inf. Crit.
|
2,375.647
|
|
Bayesian Inf. Crit.
|
2,390.529
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Sahni-K Analysis
Sahni-K and Accuracy (Computational Performance)
dataInput=dataOptProp %>% group_by(sahniK,pID)%>%summarise(accuracy1=mean(correct))%>%ungroup()%>%group_by(sahniK)%>%summarise(accuracy=mean(accuracy1),se=se(accuracy1))%>%ungroup()
plo= ggplot(data=dataInput, aes(y=accuracy, x=as.factor(sahniK),
label = round(accuracy,digits = 2))) +
geom_bar(stat = "identity")+
geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1)+
geom_signif(comparisons = list(c("0","1")), annotations="***",
y_position = 1, tip_length = 0.03)+
geom_signif(comparisons = list(c("1","2")), annotations="*",
y_position = 0.8, tip_length = 0.03)+
labs(title="Computational Performance and sahni-K complexity",
x="Sahni-K Complexity",y="Computational Performance")+
coord_cartesian(ylim = c(0.3,1.03))+
geom_text(hjust=0.5,vjust = 4.5, colour="white") +
theme_light()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))+
geom_hline(aes(yintercept = 0.5),linetype="dotted")
outputName = "opt09g"
plotExport(plo,outputName)
## [1] "opt09g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
#Sahni-K boxplot: I think it says quite a bit
# dataInput=dataOptProp %>% group_by(sahniK,pID)%>%summarise(accuracy=mean(correct))%>%ungroup()
#
# ggplot(dataInput, aes(x=as.factor(sahniK), y=accuracy))+
# geom_boxplot(outlier.shape=1)+
# #geom_boxplot( aes(x=as.logical(phaseT), y=accuracy),outlier.shape=1) +
# stat_summary(fun.y=mean, colour="darkred", geom="point", size=2, shape=17) +
# stat_summary(fun.y=mean, colour="darkred", geom="text", size=3, aes( label=round(..y.., digits=2)),hjust=1.5,vjust=0.5) +
# #coord_cartesian(ylim = c(0,1)) +
# labs(title="Accuracy and Sahni-K",x="Sahni-K",y="Percentage")+
# theme(plot.title = element_text(hjust = 0.5))+#, axis.text.x = element_text(angle=45,hjust=1))+
# geom_hline(aes(yintercept=0.5), colour="#990000", linetype="dashed")
logitRandomIntercept = glmer(correct ~ sahniK + (1|pID), family=binomial(link="logit"), data=dataOptProp)
title="Logistic regressions"
tableName='opt09r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “opt09r”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
sahniK
|
-1.333***
|
|
|
(0.193)
|
|
|
p = 0.000
|
|
|
|
|
Constant
|
2.310***
|
|
|
(0.219)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
358
|
|
Log Likelihood
|
-135.816
|
|
Akaike Inf. Crit.
|
277.633
|
|
Bayesian Inf. Crit.
|
289.274
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
#logistic corroborations of significance shown in Accuracy/SahniK plot
dataInput =dataOptProp
dataInput$sahniK0= (dataInput$sahniK==0)
dataInput$sahniK1= (dataInput$sahniK==1)
dataInput$sahniK2= (dataInput$sahniK==2)
#logitRandomIntercept0 = glmer(correct ~ sahniK1 + sahniK2 + (1|pID), family=binomial(link="logit"), data=dataInput)
logitRandomIntercept1 = glmer(correct ~ sahniK0 + sahniK2 + (1|pID), family=binomial(link="logit"), data=dataInput)
title="Logistic regressions"
tableName='opt09r1'
modelTableStyle(logitRandomIntercept1,title,outputType,tableName)
[1] “opt09r1”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
sahniK0
|
1.751***
|
|
|
(0.398)
|
|
|
p = 0.00002
|
|
|
|
|
sahniK2
|
-0.828*
|
|
|
(0.463)
|
|
|
p = 0.074
|
|
|
|
|
Constant
|
0.625*
|
|
|
(0.337)
|
|
|
p = 0.064
|
|
|
|
|
|
|
Observations
|
358
|
|
Log Likelihood
|
-135.116
|
|
Akaike Inf. Crit.
|
278.233
|
|
Bayesian Inf. Crit.
|
293.755
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Sahni-K and Time Spent
dataInput=dataOptTime %>% group_by(sahniK,pID)%>%summarise(timeSpentMeans=mean(timeSpent))%>%ungroup()%>%group_by(sahniK)%>%summarise(time=mean(timeSpentMeans),se=se(timeSpentMeans))%>%ungroup()
plo= ggplot(data=dataInput, aes(y=time, x=as.factor(sahniK),
label = round(time,digits = 1))) +
geom_bar(stat = "identity")+
geom_errorbar(aes(ymin=time-se, ymax=time+se), width=.1)+
geom_signif(comparisons = list(c("1","2")), annotations="**",
y_position = 52, tip_length = 0.03)+
geom_signif(comparisons = list(c("0","2")), annotations="***",
y_position = 56, tip_length = 0.03)+
geom_signif(comparisons = list(c("0","1")), annotations="NS",
y_position = 48, tip_length = 0.03)+
labs(title="Time spent on an instance and sahni-K complexity",
x="Sahni-K Complexity",y="Average time spent on an instance")+
coord_cartesian(ylim = c(0,57))+
geom_text(hjust=0.5,vjust = 5, colour="white") +
theme_light()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))
outputName = "opt10g"
plotExport(plo,outputName)
## [1] "opt10g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
linearMRandomIntercept = lmer(timeSpent ~ sahniK + (1|pID), data=dataOptTime)
title="Linear regressions"
tableName='opt10r'
modelTableStyle(linearMRandomIntercept,title,outputType,tableName)
[1] “opt10r”
|
|
|
|
Linear regressions
|
|
|
|
|
|
timeSpent
|
|
|
Random Intercept
|
|
|
|
sahniK
|
3.125***
|
|
|
(0.950)
|
|
|
p = 0.001
|
|
|
|
|
Constant
|
41.469***
|
|
|
(1.990)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
305
|
|
Log Likelihood
|
-1,181.856
|
|
Akaike Inf. Crit.
|
2,371.712
|
|
Bayesian Inf. Crit.
|
2,386.593
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
#logistic corroborations of significance shown in Accuracy/SahniK plot for time
dataInput =dataOptProp
dataInput$sahniK0= (dataInput$sahniK==0)
dataInput$sahniK1= (dataInput$sahniK==1)
dataInput$sahniK2= (dataInput$sahniK==2)
linearRandomIntercept0 = lmer(timeSpent ~ sahniK1 + sahniK2 + (1|pID), data=dataInput)
linearRandomIntercept1 = lmer(timeSpent ~ sahniK0 + sahniK2 + (1|pID), data=dataInput)
title="Linear regressions"
tableName='opt09r2a'
modelTableStyle(list(linearRandomIntercept0,linearRandomIntercept0),title,outputType,tableName)
[1] “opt09r2a”
|
|
|
|
Linear regressions
|
|
|
|
|
|
timeSpent
|
|
|
Random Intercept
|
|
|
|
(1)
|
(2)
|
|
|
|
sahniK1
|
0.928
|
0.928
|
|
|
(1.732)
|
(1.732)
|
|
|
p = 0.593
|
p = 0.593
|
|
|
|
|
|
sahniK2
|
6.011***
|
6.011***
|
|
|
(1.732)
|
(1.732)
|
|
|
p = 0.001
|
p = 0.001
|
|
|
|
|
|
Constant
|
44.360***
|
44.360***
|
|
|
(2.212)
|
(2.212)
|
|
|
p = 0.000
|
p = 0.000
|
|
|
|
|
|
|
|
Observations
|
358
|
358
|
|
Log Likelihood
|
-1,362.686
|
-1,362.686
|
|
Akaike Inf. Crit.
|
2,735.371
|
2,735.371
|
|
Bayesian Inf. Crit.
|
2,754.774
|
2,754.774
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
tableName='opt09r2b'
modelTableStyle(list(linearRandomIntercept1,linearRandomIntercept1),title,outputType,tableName)
[1] “opt09r2b”
|
|
|
|
Linear regressions
|
|
|
|
|
|
timeSpent
|
|
|
Random Intercept
|
|
|
|
(1)
|
(2)
|
|
|
|
sahniK0
|
-0.928
|
-0.928
|
|
|
(1.732)
|
(1.732)
|
|
|
p = 0.593
|
p = 0.593
|
|
|
|
|
|
sahniK2
|
5.084**
|
5.084**
|
|
|
(2.290)
|
(2.290)
|
|
|
p = 0.027
|
p = 0.027
|
|
|
|
|
|
Constant
|
45.288***
|
45.288***
|
|
|
(2.672)
|
(2.672)
|
|
|
p = 0.000
|
p = 0.000
|
|
|
|
|
|
|
|
Observations
|
358
|
358
|
|
Log Likelihood
|
-1,362.686
|
-1,362.686
|
|
Akaike Inf. Crit.
|
2,735.371
|
2,735.371
|
|
Bayesian Inf. Crit.
|
2,754.774
|
2,754.774
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Sahni-K, IC and Effort
Is effort (time spent) modulated by Sahni-K or IC?
linearRandomIntercept = lmer(timeSpent ~ sahniK + phaseT + sahniK:phaseT + (1|pID), data=dataOptTime)
title="Linear regressions"
tableName='opt11r'
modelTableStyle(linearRandomIntercept,title,outputType,tableName)
[1] “opt11r”
|
|
|
|
Linear regressions
|
|
|
|
|
|
timeSpent
|
|
|
Random Intercept
|
|
|
|
sahniK
|
1.489
|
|
|
(2.687)
|
|
|
p = 0.580
|
|
|
|
|
phaseT
|
9.535***
|
|
|
(1.366)
|
|
|
p = 0.000
|
|
|
|
|
sahniK:phaseT
|
0.502
|
|
|
(2.844)
|
|
|
p = 0.861
|
|
|
|
|
Constant
|
35.498***
|
|
|
(2.178)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
305
|
|
Log Likelihood
|
-1,151.532
|
|
Akaike Inf. Crit.
|
2,315.065
|
|
Bayesian Inf. Crit.
|
2,337.386
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Time Spent and Accuracy (Computational Performance)
dataInput= dataOptTime %>% group_by(correct,pID) %>% summarise(timeSpentMeans=mean(timeSpent)) %>% ungroup() %>% group_by(correct) %>%
summarise(time=mean(timeSpentMeans),se=se(timeSpentMeans))%>%ungroup()
plo = ggplot(data=dataInput, aes(y=time, x=as.factor(as.logical(correct)), label = round(time,digits = 2),group=1)) +
geom_line()+
geom_errorbar(aes(ymin=time-se, ymax=time+se), width=.1) +
geom_point(shape=21, size=3, fill="white")+
labs(title="Time Spent on Correct/Incorrect instances",x="Reached Optimum",y="Time Spent") +
theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle=0,hjust=0.5))+
geom_text(hjust=-0.5, colour="black")
outputName = "opt15g"
plotExport(plo,outputName)
## [1] "opt15g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
#Stats test for time in and out of phase transition: ANOVA
anovaModel=aov(timeSpent~correct+Error(pID/correct),dataOptTime)
anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
## [1] "P-value for one way ANOVA: 1.27e-05"
rm(dataInput)
#Stats test for time for correct/incorrect answers
logitRandomIntercept = glmer(correct ~ timeSpent + (1|pID), family=binomial(link="logit"), data=dataOptTime)
title="Logistic regressions"
tableName='opt15r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “opt15r”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
timeSpent
|
-0.072***
|
|
|
(0.017)
|
|
|
p = 0.00003
|
|
|
|
|
Constant
|
4.975***
|
|
|
(0.865)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
305
|
|
Log Likelihood
|
-124.754
|
|
Akaike Inf. Crit.
|
255.509
|
|
Bayesian Inf. Crit.
|
266.670
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Decision Vs. Optimisation
names(summaryListDec[[2]])['mean(correct)'==names(summaryListDec[[2]])]='AccuracyDecision'
names(summaryListOpt[[2]])['mean(correct)'==names(summaryListOpt[[2]])]='AccuracyOptimisation'
varMerge = merge(summaryListDec[[2]], summaryListOpt[[2]], by="pID")
#varMerge = varMerge %>% filter(pID != "be16")#Outlier
cortest = cor.test(varMerge$AccuracyDecision,varMerge$AccuracyOptimisation,method="pearson")#spearman is non-parametric
print(cortest)
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyDecision and varMerge$AccuracyOptimisation
## t = 2.4059, df = 18, p-value = 0.02709
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.06494383 0.76813291
## sample estimates:
## cor
## 0.493288
plo = ggplot(varMerge,aes_string(x="AccuracyDecision",y="AccuracyOptimisation"))+
geom_point(shape=23, size=3, fill="red") +
geom_smooth(method="lm",se=FALSE,linetype="dashed")+#GLM or LM(SAT...)
labs(x="Knapsack Accuracy: Decision",y="Knapsack Accuracy: Optimisation")+
theme_light()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),plot.title = element_text(hjust = 0.5))
outputName = "decOpt01g"
plotExport(plo,outputName)
## [1] "decOpt01g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
Appendix Test Corroborations
Controlling for effort, is Phase Transition still significant in the optimisation task?
#Keeping timeSpent constant. is Phase transition effect on accuracy significant?
logitRandomIntercept = glmer(correct ~ phaseT + timeSpent + (1|pID), family=binomial(link="logit"), data=dataOptTime)
title="Logistic regressions"
tableName='app01r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “app01r”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
phaseT
|
-2.359***
|
|
|
(0.743)
|
|
|
p = 0.002
|
|
|
|
|
timeSpent
|
-0.055***
|
|
|
(0.017)
|
|
|
p = 0.002
|
|
|
|
|
Constant
|
6.137***
|
|
|
(1.062)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
305
|
|
Log Likelihood
|
-115.648
|
|
Akaike Inf. Crit.
|
239.296
|
|
Bayesian Inf. Crit.
|
254.177
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Solvability and accuracy (Computational Performance) for KS Decision Task
dataInput=dataDecProp %>% group_by(sol,pID)%>%summarise(accuracy1=mean(correct))%>%ungroup()%>%group_by(sol)%>%summarise(accuracy=mean(accuracy1),se=se(accuracy1))%>%ungroup()
plo = ggplot(data=dataInput, aes(y=accuracy, x=as.factor(as.logical(sol)), label = round(accuracy,digits = 2),group=1)) +
geom_line()+
geom_errorbar(aes(ymin=accuracy-se, ymax=accuracy+se), width=.1) +
geom_point(shape=21, size=3, fill="white")+
labs(title="Accuracy depending on solvability",x="Does the instance have a solution?",y="Accuracy")+
theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle=0,hjust=0.5))+
geom_text(hjust=-0.5, colour="black")
outputName = "app02g"
plotExport(plo,outputName)
## [1] "app02g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
anovaModel=aov(correct~sol+Error(pID/sol),dataDecProp)
anovaPValue=summary(anovaModel)[[2]][[1]][['Pr(>F)']][[1]]
print(paste("P-value for one way ANOVA:",signif(anovaPValue,digits=3)))
[1] “P-value for one way ANOVA: 0.163”
rm(dataInput)
#logistic with random effects (intercept)
logitRandomIntercept = glmer(correct ~ answer + (1|pID), family=binomial(link="logit"), data=dataDecProp)
title="Logistic regressions"
tableName='app02r'
modelTableStyle(logitRandomIntercept,title,outputType,tableName)
[1] “app02r”
|
|
|
|
Logistic regressions
|
|
|
|
|
|
correct
|
|
|
Random Intercept
|
|
|
|
answer
|
0.168
|
|
|
(0.144)
|
|
|
p = 0.245
|
|
|
|
|
Constant
|
1.565***
|
|
|
(0.131)
|
|
|
p = 0.000
|
|
|
|
|
|
|
Observations
|
1,427
|
|
Log Likelihood
|
-639.750
|
|
Akaike Inf. Crit.
|
1,285.500
|
|
Bayesian Inf. Crit.
|
1,301.290
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Do people still modulate their effort when they don’t find the solution in the optimisation task?
linearRandomIntercept = lmer(timeSpent ~ phaseT + correct + phaseT:correct + (1|pID), data=dataOptTime)
title="Linear regressions"
tableName='app03r'
modelTableStyle(linearRandomIntercept,title,outputType,tableName)
[1] “app03r”
|
|
|
|
Linear regressions
|
|
|
|
|
|
timeSpent
|
|
|
Random Intercept
|
|
|
|
phaseT
|
15.161**
|
|
|
(7.261)
|
|
|
p = 0.037
|
|
|
|
|
correct
|
-0.489
|
|
|
(7.210)
|
|
|
p = 0.946
|
|
|
|
|
phaseT:correct
|
-6.784
|
|
|
(7.374)
|
|
|
p = 0.358
|
|
|
|
|
Constant
|
36.227***
|
|
|
(7.379)
|
|
|
p = 0.00000
|
|
|
|
|
|
|
Observations
|
305
|
|
Log Likelihood
|
-1,142.942
|
|
Akaike Inf. Crit.
|
2,297.885
|
|
Bayesian Inf. Crit.
|
2,320.207
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Arithmetic Test
First 3 trials where regarded as practice trials.
# Import Math Data (Mental Arithmetic)
dataMath=importMathInfo(folderDataMath)
dataMath=merge(dataMath,partLog, by="pID" )
# Filter defined versions
dataMath=dplyr::filter(dataMath,ExpVersion %in% versions)
#Generate Correct/Incorrect Variable
dataMath$correct= (dataMath$answer == dataMath$solution)
dataMath$correct[is.na(dataMath$correct)]=FALSE
#Filter First 3 trials of task; they were considered practice trials
dataMath=dataMath %>% filter( trial>= 4 | block != 1 )
summaryMath=dataMath %>% group_by(pID) %>% summarise(mean(correct), mean(timeSpent), mean(submitted))
names(summaryMath)['mean(correct)'==names(summaryMath)]='AccuracyMath'
print(paste('Number of participants to analyse:',length(unique(dataMath$pID)) ) )
## [1] "Number of participants to analyse: 20"
print(paste('Participants To be analysed are:',paste(unique(dataMath$pID),collapse=" ")))
## [1] "Participants To be analysed are: be14 be15 be16 be17 be18 be19 be20 be21 be22 be23 be24 be25 be26 be27 be28 be29 be30 be31 be32 be33"
print(paste("The overall MATH accuracy is",mean(dataMath$correct)))
## [1] "The overall MATH accuracy is 0.628333333333333"
Accuracy in Decision Knapsack problem and Mental Arithmetic
names(summaryListDec[[2]])['AccuracyDecision'==names(summaryListDec[[2]])]='AccuracyKnapsack'
mathMerge = merge(summaryListDec[[2]], summaryMath, by="pID")
#mathMerge = mathMerge %>% filter(pID != "be16")
plo = ggplot(mathMerge,aes(x=AccuracyKnapsack,y=AccuracyMath))+
geom_point() +
geom_smooth(method="lm")#GLM or LM(SAT...)
outputName = "math01g"
plotExport(plo,outputName)
## [1] "math01g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
mathDec = cor.test(mathMerge$AccuracyKnapsack,mathMerge$AccuracyMath,method="pearson")#spearman is non-parametric vs. pearson which is parametric (linear)
Accuracy in Optimisation Knapsack problem and Mental Arithmetic
#Accuracy of Knapsack vs. Math Task Accuracy
names(summaryListOpt[[2]])['AccuracyOptimisation'==names(summaryListOpt[[2]])]='AccuracyKnapsack'
mathMerge = merge(summaryListOpt[[2]], summaryMath, by="pID")
plo = ggplot(mathMerge,aes(x=AccuracyKnapsack,y=AccuracyMath))+
geom_point() +
geom_smooth(method="lm")
outputName = "math02g"
plotExport(plo,outputName)
## [1] "math02g"
## [1] "~/Google Drive/Melbourne/UNIMELB/Research/Complexity Project/KS-IC/Code/Behavioural-Analysis"

## quartz_off_screen
## 2
mathOpt =cor.test(mathMerge$AccuracyKnapsack,mathMerge$AccuracyMath,method="pearson")#spearman is non-parametric
CANTAB Task
#CANTAB Data import
dataCantab=importCantabInfo(folderDataCantab)
names(dataCantab)[names(dataCantab)=="subject.ID"]="pID"
dataCantab=merge(dataCantab,partLog, by="pID" )
#Subsetting the relevant versions
dataCantab=dplyr::filter(dataCantab,ExpVersion %in% versions)
# Subsetting only completed Tests
dataCantabWork=dataCantab[dataCantab$PAL.Recommended.Standard.Extended.Status=="COMPLETED",]
#Variables of interest
vInterest=c("pID","PALFAMS28","PALTEA28","SSPFSL","SWMS","SWMBE4","SWMBE6","SWMBE8","SWMBE12","SWMBE468","SWMTE4","SWMTE6","SWMTE8","SWMTE12","SWMTE468")
dataCantabWork=dataCantabWork[,vInterest]
#Generates new measure for SWM
SWMweights=c(4,3,2,1)
dataCantabWork$SWMTEweightedJP=dataCantabWork$SWMTE4 * SWMweights[1] + dataCantabWork$SWMTE6 * SWMweights[2] +dataCantabWork$SWMTE8 * SWMweights[3] #+ dataCantabWork$SWMTE12 * SWMweights[4]
dataCantabWork$SWMBEweightedJP=dataCantabWork$SWMBE4 * SWMweights[1] + dataCantabWork$SWMBE6 * SWMweights[2] +dataCantabWork$SWMBE8 * SWMweights[3] #+ dataCantabWork$SWMBE12 * SWMweights[4]
#Drops unwanted CANTAB Tasks
dropC=c("SWMBE4","SWMBE6","SWMBE8","SWMBE12","SWMBE468","SWMTE4","SWMTE6","SWMTE8","SWMTE12","SWMTE468")
dataCantabWork=dataCantabWork[,!(names(dataCantabWork) %in% dropC)]
#Actual variable list to be used for correlation analysis with knapsack performance measures.
variables=names(dataCantabWork)[-1]
#Analysis of a variables and mean Decision KS accuracy
decResults=list()
for (i in 1:length(variables)){
#print(i)
summaryVar=dataCantabWork[,c("pID",variables[i])]
#summaryVar=dataCantabWork[,c("subject.ID","SSPFSL")]
names(summaryVar)[1]="pID"
decResults[[i]]=indDiffAnalysis(summaryVar,dataAllDec)
}
## [1] "PALFAMS28"
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -0.48043, df = 18, p-value = 0.6367
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5287144 0.3472938
## sample estimates:
## cor
## -0.1125195

## [1] "PALTEA28"
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = 0.67781, df = 18, p-value = 0.5065
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3061315 0.5611094
## sample estimates:
## cor
## 0.1577611

## [1] "SSPFSL"
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -0.0814, df = 18, p-value = 0.936
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4578171 0.4269626
## sample estimates:
## cor
## -0.01918253

## [1] "SWMS"
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -1.5439, df = 18, p-value = 0.14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.6813704 0.1184952
## sample estimates:
## cor
## -0.3419566

## [1] "SWMTEweightedJP"
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -0.70894, df = 18, p-value = 0.4874
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5660508 0.2995542
## sample estimates:
## cor
## -0.1648142

## [1] "SWMBEweightedJP"
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -0.82745, df = 18, p-value = 0.4188
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5844390 0.2743335
## sample estimates:
## cor
## -0.1914261

#Analysis of a variables and mean Optimisation KS accuracy
optResults=list()
for (i in 1:length(variables)){
summaryVar=dataCantabWork[,c("pID",variables[i])]
names(summaryVar)[1]="pID"
optResults[[i]]=indDiffAnalysis(summaryVar,dataAllOpt)
}
## [1] "PALFAMS28"
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = 0.82498, df = 18, p-value = 0.4202
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2748634 0.5840616
## sample estimates:
## cor
## 0.1908739

## [1] "PALTEA28"
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -0.74953, df = 18, p-value = 0.4632
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5724234 0.2909486
## sample estimates:
## cor
## -0.1739711

## [1] "SSPFSL"
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = 0.91143, df = 18, p-value = 0.3741
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2563090 0.5970615
## sample estimates:
## cor
## 0.2100344

## [1] "SWMS"
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -1.5912, df = 18, p-value = 0.129
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.6869391 0.1081608
## sample estimates:
## cor
## -0.3511681

## [1] "SWMTEweightedJP"
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -1.458, df = 18, p-value = 0.1621
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.6710119 0.1372686
## sample estimates:
## cor
## -0.3249937

## [1] "SWMBEweightedJP"
##
## Pearson's product-moment correlation
##
## data: varMerge$AccuracyKnapsack and varMerge[[varName]]
## t = -1.4186, df = 18, p-value = 0.1731
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.6661452 0.1458928
## sample estimates:
## cor
## -0.3171001

pearson_se = function(cortest){
Standard.Error = unname(sqrt((1 - cortest$estimate^2)/cortest$parameter))
return(Standard.Error)
}
cogTasksTable=data.frame(Task=c("Mental Arithmetic"),
KS_Dec_corr=c(mathDec$estimate),
KS_Dec_Pvalue=c(mathDec$p.value),
KS_Dec_df=c(mathDec$parameter),
KS_Dec_se=c(pearson_se(mathDec)),
KS_Opt_corr=c(mathOpt$estimate),
KS_Opt_Pvalue=c(mathOpt$p.value),
KS_Opt_df=c(mathOpt$parameter),
KS_Opt_se=c(pearson_se(mathOpt)),
stringsAsFactors=FALSE,row.names = NULL)
for (i in 1:length(variables)){
row=list(variables[i],
decResults[[i]]$corrTest$estimate,
decResults[[i]]$corrTest$p.value,
decResults[[i]]$corrTest$parameter,
pearson_se(decResults[[i]]$corrTest),
optResults[[i]]$corrTest$estimate,
optResults[[i]]$corrTest$p.value,
optResults[[i]]$corrTest$parameter,
pearson_se(optResults[[i]]$corrTest))
cogTasksTable=rbind(cogTasksTable,row)
}
title="Cognitive and Knapsack Abilities Correlations"
outputName="cogr01"
exportTable(cogTasksTable,title,outputType,outputName)
[1] “cogr01”
|
|
|
Task
|
KS_Dec_corr
|
KS_Dec_Pvalue
|
KS_Dec_df
|
KS_Dec_se
|
KS_Opt_corr
|
KS_Opt_Pvalue
|
KS_Opt_df
|
KS_Opt_se
|
|
|
|
Mental Arithmetic
|
0.022
|
0.926
|
18
|
0.236
|
0.359
|
0.120
|
18
|
0.220
|
|
PALFAMS28
|
-0.113
|
0.637
|
18
|
0.234
|
0.191
|
0.420
|
18
|
0.231
|
|
PALTEA28
|
0.158
|
0.507
|
18
|
0.233
|
-0.174
|
0.463
|
18
|
0.232
|
|
SSPFSL
|
-0.019
|
0.936
|
18
|
0.236
|
0.210
|
0.374
|
18
|
0.230
|
|
SWMS
|
-0.342
|
0.140
|
18
|
0.221
|
-0.351
|
0.129
|
18
|
0.221
|
|
SWMTEweightedJP
|
-0.165
|
0.487
|
18
|
0.232
|
-0.325
|
0.162
|
18
|
0.223
|
|
SWMBEweightedJP
|
-0.191
|
0.419
|
18
|
0.231
|
-0.317
|
0.173
|
18
|
0.224
|
|
|
Refer to “tables.Rmd” for the code on how to combine the tables.